#setwd("/home/creatrol/ws/R/Tutorials")
library(ggplot2)
library(dplyr)
library(tidyr)
head(mtcars)
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
## Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
ggplot(mtcars, aes(x = cyl, y = mpg)) +
geom_point() # considered as numeric
ggplot(mtcars, aes(x = factor(cyl), y = mpg)) +
geom_point() # considered as factor
# class(mtcars$disp) is numeric
# color
ggplot(mtcars, aes(x = wt, y = mpg, col = disp)) +
geom_point()
# size
ggplot(mtcars, aes(x = wt, y = mpg, size = disp))+
geom_point()
# shape -- can not be used to continuous variable
ggplot(mtcars, aes(x = wt, y = mpg, shape = factor(cyl))) +
geom_point()
head(iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
ggplot(iris, aes(x = Sepal.Length, y = Sepal.Width)) +
geom_point()+
geom_point(aes(x = Petal.Length, y = Petal.Width), col = "red")
# tidy data:
iris.tidy <- gather(iris, Part.Measure, Value, -Species)
iris.tidy2 <- separate(iris.tidy, Part.Measure, c("Part", "Measure"), sep = "\\.")
#iris.tidy2 <- iris.tidy %>% mutate(Part.Measure = gsub(pattern = "\\.", replacement = "_", x = Part.Measure)) %>%
# separate(Part.Measure, into = c("Part", "Measure"), sep= "_")
iris.tidy2$row <- 1:nrow(iris.tidy2)
iris.tidy <- spread(iris.tidy2, Species, Value)
iris.tidy2 <- iris.tidy2[,1:4]
# plot -- a hard way
str(iris.tidy)
## 'data.frame': 600 obs. of 6 variables:
## $ Part : chr "Petal" "Petal" "Petal" "Petal" ...
## $ Measure : chr "Length" "Length" "Length" "Length" ...
## $ row : int 301 302 303 304 305 306 307 308 309 310 ...
## $ setosa : num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
## $ versicolor: num NA NA NA NA NA NA NA NA NA NA ...
## $ virginica : num NA NA NA NA NA NA NA NA NA NA ...
ggplot(iris.tidy, aes(x = Part, y = setosa, col = Measure)) +
geom_jitter()
## Warning: Removed 400 rows containing missing values (geom_point).
ggplot(iris.tidy, aes(x = Part, y = versicolor, col = Measure)) +
geom_jitter()
## Warning: Removed 400 rows containing missing values (geom_point).
ggplot(iris.tidy, aes(x = Part, y = virginica, col = Measure)) +
geom_jitter()
## Warning: Removed 400 rows containing missing values (geom_point).
# plot -- a easy way
str(iris.tidy2)
## 'data.frame': 600 obs. of 4 variables:
## $ Species: Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ Part : chr "Sepal" "Sepal" "Sepal" "Sepal" ...
## $ Measure: chr "Length" "Length" "Length" "Length" ...
## $ Value : num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
ggplot(iris.tidy2, aes(x = Part, y = Value, col = Measure)) +
geom_jitter() +
facet_grid(. ~ Species)
| Aesthetic | Description |
|---|---|
| x | X axis position |
| y | Y axis position |
| colour | Colour of dots, outlines of other shapes |
| fill | Fill color |
| size | Diameter of points, thickness of lines |
| alpha | Transparency |
| linetype | Line dash pattern |
| labels | Text on a plot or axes |
| shape | Shape of points |
Empty inside can be “fill”
used for scale_color_brewer & scale_fill_brewer & scale_colour_distiller & scale_fill_distiller (palette = “”)
Positions :
sample : jitter
posn.j <- position_jitter(width = 0.1) # add random noise to raw points
ggplot(iris, aes(x = Sepal.Length, y = Sepal.Width, col = Species)) +
geom_point(position = posn.j)
sample : stack & fill & dodge
cyl.am <- ggplot(mtcars, aes(x = factor(cyl), fill = factor(am)))
cyl.am + geom_bar()
cyl.am + geom_bar(position = "stack")
cyl.am + geom_bar(position = "fill")
cyl.am + geom_bar(position = "dodge")
Scale Functions:
sample :
ggplot(iris, aes(x = Sepal.Length, y = Sepal.Width, col = Species)) +
geom_point(position = "jitter") +
scale_x_continuous("continuous Sepal Length", limits = c(2,8),
breaks = seq(2,8,3), expand = c(0,0)) +
scale_color_discrete("Species",
labels = c("Setosa1", "Versicolour2", "Virginica3")) # can use labs() to do the same thing
Optional : alpha, col, fill, shape, size
ggplot(iris, aes(x = Sepal.Length, y = Sepal.Width)) +
geom_point(aes(col = Species)) +
# change palette by manual
scale_color_manual(values = c("red","blue", "green"))
benefit here
# summary statistics
iris.summary <- aggregate(iris[1:4], list(iris$Species), mean)
names(iris.summary)[1] <- "Species"
# plot Add layers
ggplot(iris, aes(x = Sepal.Length, y = Sepal.Width, col = Species)) +
geom_point()+
# add points from other data
geom_point(data = iris.summary, shape = 21, size = 5, fill = "black") +
# add lines from other data
geom_vline(data = iris.summary, linetype = 2,
aes(xintercept = Sepal.Length, col = Species)) +
geom_hline(data = iris.summary, linetype = 2,
aes(yintercept = Sepal.Width, col = Species))
ggplot(iris, aes(x = Sepal.Width)) +
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
diff(range(iris$Sepal.Width)) / 30
## [1] 0.08
# change the bin width
ggplot(iris, aes(x = Sepal.Width)) +
geom_histogram(binwidth = 0.1)
# change to density
ggplot(iris, aes(x = Sepal.Width)) +
geom_histogram(aes(y = ..density..), binwidth = 0.1)
# fill color & position
ggplot(iris, aes(x = Sepal.Width, fill = Species)) +
geom_histogram(binwidth = 0.1, position = "dodge")
# position
posn.d <- position_dodge(width = 6)
ggplot(iris, aes(x = Sepal.Width, fill = Species)) +
geom_histogram(binwidth = 0.1, position = posn.d)
# density line
ggplot(iris, aes(x = Sepal.Width, col = Species)) +
geom_freqpoly(aes(y = ..density..), binwidth = 0.1)
# change palette
ggplot(iris, aes(x = Sepal.Width, fill = Species)) +
geom_bar() +
scale_fill_brewer(palette = "Set1")
# change palette by manual
ggplot(iris, aes(x = Sepal.Width, fill = Species)) +
geom_bar() +
scale_fill_manual(values = c("red","blue", "green"))
# change palette by manual
ggplot(iris, aes(x = Sepal.Width, fill = Species)) +
geom_bar() +
scale_color_manual(values = c("red","blue", "green"))
ggplot(iris, aes(Sepal.Width)) + geom_bar()
# Distribution Bar Plots
library(plyr); library(reshape2)
iris_melted <- melt(iris, value.name = "Value",
variable.name = "Measure")
iris_summ <- ddply(iris_melted[iris_melted$Measure == "Sepal.Width",],
"Species", summarise, avg = mean(Value), stdev = sd(Value))
str(iris_summ)
## 'data.frame': 3 obs. of 3 variables:
## $ Species: Factor w/ 3 levels "setosa","versicolor",..: 1 2 3
## $ avg : num 3.43 2.77 2.97
## $ stdev : num 0.379 0.314 0.322
ggplot(iris_summ, aes(x = Species, y = avg)) +
# here tell geom_bar not to count
geom_bar(stat = "identity", fill = "grey50") +
# add error bar
geom_errorbar(aes(ymin = avg - stdev, ymax = avg + stdev),
width = 0.2)
str(beaver1)
## 'data.frame': 114 obs. of 4 variables:
## $ day : num 346 346 346 346 346 346 346 346 346 346 ...
## $ time : num 840 850 900 910 920 930 940 950 1000 1010 ...
## $ temp : num 36.3 36.3 36.4 36.4 36.5 ...
## $ activ: num 0 0 0 0 0 0 0 0 0 0 ...
ggplot(beaver1, aes(x = time, y = temp, col = factor(activ))) +
geom_line()
ggplot(beaver1, aes(x = time, y = temp, linetype = factor(activ))) +
geom_line()
ggplot(beaver1, aes(x = time, y = temp, size = factor(activ))) +
geom_line()
## Warning: Using size for a discrete variable is not advised.
ggplot(beaver1, aes(x = time, y = temp, fill = factor(activ))) +
geom_area(position = "fill")
ggplot(beaver1, aes(x = time, y = temp, fill = factor(activ))) +
geom_ribbon(aes(ymax = temp, ymin = 0), alpha = 0.3)
ggplot(beaver1, aes(x = time, y = temp, col = factor(activ))) +
geom_rect(aes(xmin = 500, xmax =1500, ymin = 36.6, ymax = 37.2),
color="grey20", fill = "red", alpha = 0.2, inherit.aes = FALSE) +
geom_line()
p <- ggplot(iris, aes(x = Sepal.Width))
p + geom_histogram()
p + geom_bar()
p + stat_bin()
ggplot(iris, aes(x = Sepal.Length, y = Sepal.Width, col = Species)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE)
ggplot(iris, aes(x = Sepal.Length, y = Sepal.Width, col = Species)) +
geom_point() +
stat_smooth(method = "lm",
span = 0.7 #the size of this window,weighted, sliding-window, average to calculate a line of best fit
) +
stat_quantile()
## Warning in rq.fit.br(wx, wy, tau = tau, ...): Solution may be nonunique
ggplot(iris, aes(x = Sepal.Length, y = Sepal.Width, col = Species)) +
geom_point() +
stat_quantile()
## Warning in rq.fit.br(wx, wy, tau = tau, ...): Solution may be nonunique
ggplot(iris, aes(x = Sepal.Length, y = Sepal.Width, col = Species)) +
geom_point() +
stat_sum()
ggplot(iris, aes(x = Species, y = Sepal.Length)) +
stat_summary(fun.data = mean_sdl, fun.args = list(mult = 1))
ggplot(iris, aes(x = Species, y = Sepal.Length)) +
stat_summary(fun.y = mean, geom = "point") +
stat_summary(fun.data = mean_sdl, fun.args = list(mult = 1),
geom = "errorbar", width = 0.1)
# bar -- not recommend
ggplot(iris, aes(x = Species, y = Sepal.Length)) +
stat_summary(fun.y = mean, geom = "bar", fill = "skyblue") +
stat_summary(fun.data = mean_sdl, fun.args = list(mult = 1),
geom = "errorbar", width = 0.1)
library(MASS)
mam.new <- data.frame(body = log10(mammals$body))
ggplot(mam.new, aes(x = body)) +
geom_histogram(aes(y = ..density..)) +
geom_rug() +
stat_function(fun = dnorm, colour = "red",
args = list(mean = mean(mam.new$body),
sd = sd(mam.new$body)))
# Original Plot
iris.smooth <- ggplot(iris, aes(x = Sepal.Length,
y = Sepal.Width, col = Species)) +
geom_point(alpha = 0.7) + geom_smooth()
iris.smooth
# scale_x_continuous
iris.smooth + scale_x_continuous(limits = c(4.5, 5.5))
## Warning: Removed 95 rows containing non-finite values (stat_smooth).
## Warning: Removed 95 rows containing missing values (geom_point).
# xlim
iris.smooth + xlim(c(4.5, 5.5))
## Warning: Removed 95 rows containing non-finite values (stat_smooth).
## Warning: Removed 95 rows containing missing values (geom_point).
# coord_cartesian
iris.smooth + coord_cartesian(xlim = c(4.5, 5.5))
iris.smooth <- ggplot(iris, aes(x = Sepal.Length,
y = Sepal.Width, col = Species)) +
geom_line(alpha = 0.7) + geom_smooth()
iris.smooth + coord_equal() # a 1:1 aspect ratio
# fixed ratio
iris.smooth + coord_fixed(0.05)
# pie chart
ggplot(iris, aes(x = Sepal.Length, col = Species)) +
stat_bin() + coord_polar()
ggplot(iris, aes(x = Sepal.Length, col = Species)) +
stat_bin() + coord_polar() +
theme(text = element_blank(),
rect = element_blank())
theme_update & theme_set